# ================================ 1.获取数据 ==============================================
from datasets import load_dataset, load_from_disk

# # 在线加载并显式保存
# path = "lansinuote/ChnSentiCorp"
# dataset = load_dataset(path=path)  # 保存到 .cache  文件夹下
#
# # 转为 CSV 格式
# dataset["train"].to_csv(path_or_buf=r"D:\code\base_learn\learn_ai\hugging_face\data\ChnSentiCorp.csv")
# # 保存为本地数据集
# save_path = r"D:\code\base_learn\learn_ai\hugging_face\saved_dataset"
# dataset.save_to_disk(save_path)

# # 离线加载数据
# dataset = load_from_disk(save_path)
# print(dataset)
#
# train_data = dataset["train"]
# for data in train_data:
#     print(data)

# 加载 CSV 格式数据
dataset = load_dataset(
    "csv",
    data_files=r"D:\code\base_learn\learn_ai\hugging_face\data\ChnSentiCorp.csv"
)
print(dataset)
train_data = dataset["train"]
for data in train_data:
    print(data)
